library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rjson)
library(stringr)
First, I loaded the data from a Githib respository run by Johns Hopkins University. The data is panel data that details the number of Covid-19 cases, deaths, and recoveries in a specified area. Each set of panel data corresponds to either US or global data. For this project, I used the US data, as I wanted to make a choropleth and the FIPS codes in the US data made the process easier.
confirmed_US <-read.csv("https://raw.github.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
deaths_US <-read.csv("https://raw.github.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
head(confirmed_US)
## UID iso2 iso3 code3 FIPS Admin2 Province_State Country_Region
## 1 16 AS ASM 16 60 American Samoa US
## 2 316 GU GUM 316 66 Guam US
## 3 580 MP MNP 580 69 Northern Mariana Islands US
## 4 630 PR PRI 630 72 Puerto Rico US
## 5 850 VI VIR 850 78 Virgin Islands US
## 6 84001001 US USA 840 1001 Autauga Alabama US
## Lat Long_ Combined_Key X1.22.20 X1.23.20 X1.24.20
## 1 -14.27100 -170.13200 American Samoa, US 0 0 0
## 2 13.44430 144.79370 Guam, US 0 0 0
## 3 15.09790 145.67390 Northern Mariana Islands, US 0 0 0
## 4 18.22080 -66.59010 Puerto Rico, US 0 0 0
## 5 18.33580 -64.89630 Virgin Islands, US 0 0 0
## 6 32.53953 -86.64408 Autauga, Alabama, US 0 0 0
## X1.25.20 X1.26.20 X1.27.20 X1.28.20 X1.29.20 X1.30.20 X1.31.20 X2.1.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X2.2.20 X2.3.20 X2.4.20 X2.5.20 X2.6.20 X2.7.20 X2.8.20 X2.9.20 X2.10.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X2.11.20 X2.12.20 X2.13.20 X2.14.20 X2.15.20 X2.16.20 X2.17.20 X2.18.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X2.19.20 X2.20.20 X2.21.20 X2.22.20 X2.23.20 X2.24.20 X2.25.20 X2.26.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X2.27.20 X2.28.20 X2.29.20 X3.1.20 X3.2.20 X3.3.20 X3.4.20 X3.5.20 X3.6.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X3.7.20 X3.8.20 X3.9.20 X3.10.20 X3.11.20 X3.12.20 X3.13.20 X3.14.20 X3.15.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X3.16.20 X3.17.20 X3.18.20 X3.19.20 X3.20.20 X3.21.20 X3.22.20 X3.23.20
## 1 0 0 0 0 0 0 0 0
## 2 3 3 5 12 14 15 27 29
## 3 0 0 0 0 0 0 0 0
## 4 5 5 5 5 14 21 23 31
## 5 1 2 2 3 3 6 6 7
## 6 0 0 0 0 0 0 0 0
## X3.24.20 X3.25.20 X3.26.20 X3.27.20 X3.28.20 X3.29.20 X3.30.20 X3.31.20
## 1 0 0 0 0 0 0 0 0
## 2 32 37 45 51 55 56 58 69
## 3 0 0 0 0 0 0 0 2
## 4 39 51 64 79 100 127 174 239
## 5 17 17 17 19 22 23 30 30
## 6 1 4 6 6 6 6 6 7
## X4.1.20 X4.2.20 X4.3.20 X4.4.20 X4.5.20 X4.6.20 X4.7.20 X4.8.20 X4.9.20
## 1 0 0 0 0 0 0 0 0 0
## 2 77 82 84 93 112 113 121 121 128
## 3 6 6 8 8 8 8 8 11 11
## 4 286 316 316 452 475 513 573 620 683
## 5 30 30 37 40 42 43 43 45 45
## 6 8 10 12 12 12 12 12 12 15
## X4.10.20 X4.11.20 X4.12.20 X4.13.20 X4.14.20 X4.15.20 X4.16.20 X4.17.20
## 1 0 0 0 0 0 0 0 0
## 2 130 133 133 133 133 135 135 136
## 3 11 11 11 11 11 13 13 13
## 4 725 788 897 903 923 974 1043 1068
## 5 50 51 51 51 51 51 51 51
## 6 17 19 19 19 23 24 26 26
## X4.18.20 X4.19.20 X4.20.20 X4.21.20 X4.22.20 X4.23.20 X4.24.20 X4.25.20
## 1 0 0 0 0 0 0 0 0
## 2 136 136 136 136 136 139 141 141
## 3 14 14 14 14 14 14 14 14
## 4 1118 1213 1252 1298 1252 1416 1276 1307
## 5 53 53 53 53 54 54 54 55
## 6 25 26 28 30 32 33 36 36
## X4.26.20 X4.27.20 X4.28.20 X4.29.20 X4.30.20 X5.1.20 X5.2.20
## 1 0 0 0 0 0 0 0
## 2 141 141 141 141 145 145 145
## 3 14 14 14 14 14 14 14
## 4 1371 1389 1400 1433 1539 1575 1757
## 5 57 57 57 57 66 66 66
## 6 37 39 40 43 44 42 45
head(deaths_US)
## UID iso2 iso3 code3 FIPS Admin2 Province_State Country_Region
## 1 16 AS ASM 16 60 American Samoa US
## 2 316 GU GUM 316 66 Guam US
## 3 580 MP MNP 580 69 Northern Mariana Islands US
## 4 630 PR PRI 630 72 Puerto Rico US
## 5 850 VI VIR 850 78 Virgin Islands US
## 6 84001001 US USA 840 1001 Autauga Alabama US
## Lat Long_ Combined_Key Population X1.22.20
## 1 -14.27100 -170.13200 American Samoa, US 55641 0
## 2 13.44430 144.79370 Guam, US 164229 0
## 3 15.09790 145.67390 Northern Mariana Islands, US 55144 0
## 4 18.22080 -66.59010 Puerto Rico, US 2933408 0
## 5 18.33580 -64.89630 Virgin Islands, US 107268 0
## 6 32.53953 -86.64408 Autauga, Alabama, US 55869 0
## X1.23.20 X1.24.20 X1.25.20 X1.26.20 X1.27.20 X1.28.20 X1.29.20 X1.30.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X1.31.20 X2.1.20 X2.2.20 X2.3.20 X2.4.20 X2.5.20 X2.6.20 X2.7.20 X2.8.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X2.9.20 X2.10.20 X2.11.20 X2.12.20 X2.13.20 X2.14.20 X2.15.20 X2.16.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X2.17.20 X2.18.20 X2.19.20 X2.20.20 X2.21.20 X2.22.20 X2.23.20 X2.24.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X2.25.20 X2.26.20 X2.27.20 X2.28.20 X2.29.20 X3.1.20 X3.2.20 X3.3.20 X3.4.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X3.5.20 X3.6.20 X3.7.20 X3.8.20 X3.9.20 X3.10.20 X3.11.20 X3.12.20 X3.13.20
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## X3.14.20 X3.15.20 X3.16.20 X3.17.20 X3.18.20 X3.19.20 X3.20.20 X3.21.20
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 1
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X3.22.20 X3.23.20 X3.24.20 X3.25.20 X3.26.20 X3.27.20 X3.28.20 X3.29.20
## 1 0 0 0 0 0 0 0 0
## 2 1 1 1 1 1 1 1 1
## 3 0 0 0 0 0 0 0 0
## 4 1 2 2 2 2 3 3 3
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## X3.30.20 X3.31.20 X4.1.20 X4.2.20 X4.3.20 X4.4.20 X4.5.20 X4.6.20 X4.7.20
## 1 0 0 0 0 0 0 0 0 0
## 2 1 2 3 3 4 4 4 4 4
## 3 0 0 1 1 1 1 1 1 2
## 4 6 8 11 12 15 18 20 21 23
## 5 0 0 0 0 0 0 1 1 1
## 6 0 0 0 0 0 0 0 0 1
## X4.8.20 X4.9.20 X4.10.20 X4.11.20 X4.12.20 X4.13.20 X4.14.20 X4.15.20
## 1 0 0 0 0 0 0 0 0
## 2 4 4 4 5 5 5 5 5
## 3 2 2 2 2 2 2 2 2
## 4 24 33 39 42 44 45 45 51
## 5 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1
## X4.16.20 X4.17.20 X4.18.20 X4.19.20 X4.20.20 X4.21.20 X4.22.20 X4.23.20
## 1 0 0 0 0 0 0 0 0
## 2 5 5 5 5 5 5 5 5
## 3 2 2 2 2 2 2 2 2
## 4 56 58 60 62 63 64 63 69
## 5 1 2 3 3 3 3 3 3
## 6 1 2 2 2 1 1 2 2
## X4.24.20 X4.25.20 X4.26.20 X4.27.20 X4.28.20 X4.29.20 X4.30.20 X5.1.20
## 1 0 0 0 0 0 0 0 0
## 2 5 5 5 5 5 5 5 5
## 3 2 2 2 2 2 2 2 2
## 4 77 83 84 84 86 86 92 94
## 5 3 3 4 4 4 4 4 4
## 6 2 2 2 3 4 4 4 3
## X5.2.20
## 1 0
## 2 5
## 3 2
## 4 95
## 5 4
## 6 3
I used GeoJson data for the geographical features.
url <- 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
counties <- rjson::fromJSON(file=url)
The cleaning process was pretty straightforward. I created a new column that added the total number of cases and deaths in each county. Next, I had to fix the FIPS codes in the data.
confirmed_US$totals = rowSums(confirmed_US[,-c(1:11)])
deaths_US$totals = rowSums(deaths_US[,-c(1:12)])
Some of the states are supposed to have “0”s in the front of their codes, however, since the column is numeric, the “0”s disappeared. The resulting choropleths excluded all those states (see graph below).
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
fig <- plot_ly()
fig <- fig %>% add_trace(
type="choropleth",
geojson=counties,
locations=confirmed_US$FIPS,
z=confirmed_US$totals,
text = confirmed_US$hover,
colorscale="Viridis"
)
fig <- fig %>% colorbar(title = "Number of Confirmed Cases")
fig <- fig %>% layout(
title = "Covid-19 Cases by US County<br>(note states missing data)"
)
fig <- fig %>% layout(
geo = g
)
fig
We see that Alabama is one of the states that is missing data in the map. We can also look back at the data head() function above and see that Autauga County, AL FIPS code in our data is 1001. However, the actualy FIPS code for that Autauga County is 01001. The UID variable is the FIPS code preceeded by the US country code (840). I converted the UID variable into characters and removed the country code to use as the FIPS code.
confirmed_US$UID = as.character(confirmed_US$UID)
deaths_US$UID = as.character(deaths_US$UID)
confirmed_US$FIPS_map = str_remove(confirmed_US$UID,"^840")
deaths_US$FIPS_map = str_remove(deaths_US$UID,"^840")
I used plotly to create both following the maps. I added a hover tool that tells you the name of the county, state, and number of confirmed cases or deaths, depending on the particular map.
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
confirmed_US$hover <- with(confirmed_US, paste(Admin2, "County, ", Province_State, "<br>", "Number of Confirmed Cases: ", totals))
fig1 <- plot_ly()
fig1 <- fig1 %>% add_trace(
type="choropleth",
geojson=counties,
locations=confirmed_US$FIPS_map,
z=confirmed_US$totals,
text = confirmed_US$hover,
colorscale="Viridis"
)
fig1 <- fig1 %>% colorbar(title = "Number of Confirmed Cases")
fig1 <- fig1 %>% layout(
title = "Covid-19 Cases by US County<br>(hover for breakdown)"
)
fig1 <- fig1 %>% layout(
geo = g
)
fig1
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
deaths_US$hover <- with(deaths_US, paste(Admin2, "County, ", Province_State, "<br>", "Number of Deaths: ", totals))
fig2 <- plot_ly()
fig2 <- fig2 %>% add_trace(
type="choropleth",
geojson=counties,
locations=deaths_US$FIPS_map,
z=deaths_US$totals,
zmax=4000000,
text = deaths_US$hover,
colorscale="Viridis"
)
fig2 <- fig2 %>% colorbar(title = "Number of Deaths<br>(hover for breakdown")
fig2 <- fig2 %>% layout(
title = "Covid-19 Deaths by US County"
)
fig2 <- fig2 %>% layout(
geo = g
)
fig2